package com.wu.project

import org.apache.spark.sql.SparkSession

/**
  * 清理
  * 1. 抽取我们所需要数据的列
  */
object SparkStatFormatJob {

  def main(args: Array[String]): Unit = {

    val spark = SparkSession.builder().appName("SparkStatFormatJob").master("local[10]").getOrCreate()

    val access = spark.sparkContext.textFile("E://ideaWorkSpace2018.4.23//Spark-sql//Sources//access.log");
  //  val access = spark.sparkContext.textFile("D://BaiduYunDownload//慕课网sparksql//data//access.20161111.log");
    //access.take(20).foreach(println)

   val  acce = access.map(line => {
   val splits = line.split(" ")
   val ip = splits(0)
   val time = splits(3)+" " +splits(4)
   val url = splits(11).replaceAll("\"","")
   val traffic = splits(9)

     DateUtils.parse(time)+"\t"+url+"\t"+traffic+"\t"+ip
 }).saveAsTextFile("E://ideaWorkSpace2018.4.23//Spark-sql//Sources//newproject")

    spark.close()


  }



}
